{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 首先 import 必要的模块\n",
    "import pandas as pd \n",
    "import numpy as np\n",
    "\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.metrics import accuracy_score\n",
    "\n",
    "from matplotlib import pyplot\n",
    "import seaborn as sns\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "读取数据&数据探索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Pregnancies</th>\n",
       "      <th>Glucose</th>\n",
       "      <th>BloodPressure</th>\n",
       "      <th>SkinThickness</th>\n",
       "      <th>Insulin</th>\n",
       "      <th>BMI</th>\n",
       "      <th>DiabetesPedigreeFunction</th>\n",
       "      <th>Age</th>\n",
       "      <th>Outcome</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>6</td>\n",
       "      <td>148</td>\n",
       "      <td>72</td>\n",
       "      <td>35</td>\n",
       "      <td>0</td>\n",
       "      <td>33.6</td>\n",
       "      <td>0.627</td>\n",
       "      <td>50</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>85</td>\n",
       "      <td>66</td>\n",
       "      <td>29</td>\n",
       "      <td>0</td>\n",
       "      <td>26.6</td>\n",
       "      <td>0.351</td>\n",
       "      <td>31</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8</td>\n",
       "      <td>183</td>\n",
       "      <td>64</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>23.3</td>\n",
       "      <td>0.672</td>\n",
       "      <td>32</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>89</td>\n",
       "      <td>66</td>\n",
       "      <td>23</td>\n",
       "      <td>94</td>\n",
       "      <td>28.1</td>\n",
       "      <td>0.167</td>\n",
       "      <td>21</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>137</td>\n",
       "      <td>40</td>\n",
       "      <td>35</td>\n",
       "      <td>168</td>\n",
       "      <td>43.1</td>\n",
       "      <td>2.288</td>\n",
       "      <td>33</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \\\n",
       "0            6      148             72             35        0  33.6   \n",
       "1            1       85             66             29        0  26.6   \n",
       "2            8      183             64              0        0  23.3   \n",
       "3            1       89             66             23       94  28.1   \n",
       "4            0      137             40             35      168  43.1   \n",
       "\n",
       "   DiabetesPedigreeFunction  Age  Outcome  \n",
       "0                     0.627   50        1  \n",
       "1                     0.351   31        0  \n",
       "2                     0.672   32        1  \n",
       "3                     0.167   21        0  \n",
       "4                     2.288   33        1  "
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 读取数据\n",
    "data = pd.read_csv(\"diabetes.csv\")\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 768 entries, 0 to 767\n",
      "Data columns (total 9 columns):\n",
      "Pregnancies                 768 non-null int64\n",
      "Glucose                     768 non-null int64\n",
      "BloodPressure               768 non-null int64\n",
      "SkinThickness               768 non-null int64\n",
      "Insulin                     768 non-null int64\n",
      "BMI                         768 non-null float64\n",
      "DiabetesPedigreeFunction    768 non-null float64\n",
      "Age                         768 non-null int64\n",
      "Outcome                     768 non-null int64\n",
      "dtypes: float64(2), int64(7)\n",
      "memory usage: 54.1 KB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Pregnancies</th>\n",
       "      <th>Glucose</th>\n",
       "      <th>BloodPressure</th>\n",
       "      <th>SkinThickness</th>\n",
       "      <th>Insulin</th>\n",
       "      <th>BMI</th>\n",
       "      <th>DiabetesPedigreeFunction</th>\n",
       "      <th>Age</th>\n",
       "      <th>Outcome</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>3.845052</td>\n",
       "      <td>120.894531</td>\n",
       "      <td>69.105469</td>\n",
       "      <td>20.536458</td>\n",
       "      <td>79.799479</td>\n",
       "      <td>31.992578</td>\n",
       "      <td>0.471876</td>\n",
       "      <td>33.240885</td>\n",
       "      <td>0.348958</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>3.369578</td>\n",
       "      <td>31.972618</td>\n",
       "      <td>19.355807</td>\n",
       "      <td>15.952218</td>\n",
       "      <td>115.244002</td>\n",
       "      <td>7.884160</td>\n",
       "      <td>0.331329</td>\n",
       "      <td>11.760232</td>\n",
       "      <td>0.476951</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.078000</td>\n",
       "      <td>21.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>99.000000</td>\n",
       "      <td>62.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>27.300000</td>\n",
       "      <td>0.243750</td>\n",
       "      <td>24.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>3.000000</td>\n",
       "      <td>117.000000</td>\n",
       "      <td>72.000000</td>\n",
       "      <td>23.000000</td>\n",
       "      <td>30.500000</td>\n",
       "      <td>32.000000</td>\n",
       "      <td>0.372500</td>\n",
       "      <td>29.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>6.000000</td>\n",
       "      <td>140.250000</td>\n",
       "      <td>80.000000</td>\n",
       "      <td>32.000000</td>\n",
       "      <td>127.250000</td>\n",
       "      <td>36.600000</td>\n",
       "      <td>0.626250</td>\n",
       "      <td>41.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>17.000000</td>\n",
       "      <td>199.000000</td>\n",
       "      <td>122.000000</td>\n",
       "      <td>99.000000</td>\n",
       "      <td>846.000000</td>\n",
       "      <td>67.100000</td>\n",
       "      <td>2.420000</td>\n",
       "      <td>81.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Pregnancies     Glucose  BloodPressure  SkinThickness     Insulin  \\\n",
       "count   768.000000  768.000000     768.000000     768.000000  768.000000   \n",
       "mean      3.845052  120.894531      69.105469      20.536458   79.799479   \n",
       "std       3.369578   31.972618      19.355807      15.952218  115.244002   \n",
       "min       0.000000    0.000000       0.000000       0.000000    0.000000   \n",
       "25%       1.000000   99.000000      62.000000       0.000000    0.000000   \n",
       "50%       3.000000  117.000000      72.000000      23.000000   30.500000   \n",
       "75%       6.000000  140.250000      80.000000      32.000000  127.250000   \n",
       "max      17.000000  199.000000     122.000000      99.000000  846.000000   \n",
       "\n",
       "              BMI  DiabetesPedigreeFunction         Age     Outcome  \n",
       "count  768.000000                768.000000  768.000000  768.000000  \n",
       "mean    31.992578                  0.471876   33.240885    0.348958  \n",
       "std      7.884160                  0.331329   11.760232    0.476951  \n",
       "min      0.000000                  0.078000   21.000000    0.000000  \n",
       "25%     27.300000                  0.243750   24.000000    0.000000  \n",
       "50%     32.000000                  0.372500   29.000000    0.000000  \n",
       "75%     36.600000                  0.626250   41.000000    1.000000  \n",
       "max     67.100000                  2.420000   81.000000    1.000000  "
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## 各属性的统计特性\n",
    "data.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEKCAYAAAAIO8L1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAE+JJREFUeJzt3X+0ZWV93/H3BwbUoHH4MbIow2SwjmnJUpGMFEOrEW0q2jJIxcSyZIKzOsmqURLS1NGkksTYaCJiqC3trGAYLIUgFRmUxkxHwBUTKDMoIKJlpARmYZkB+aUULPDtH+e5cBj23LtnhnPP4d73a62zzt7Pefa53+sa7se9n72fJ1WFJEk72mvcBUiSJpMBIUnqZEBIkjoZEJKkTgaEJKmTASFJ6mRASJI6GRCSpE4GhCSp04JxF7AnDjrooFq6dOm4y5Ck55XNmzffW1WLZur3vA6IpUuXsmnTpnGXIUnPK0n+tk8/LzFJkjoZEJKkTgaEJKmTASFJ6mRASJI6jTQgktyR5OYk30yyqbUdkGRDktva+/6tPUnOSbIlyU1JjhplbZKk6c3GGcSbqurIqlre9tcAG6tqGbCx7QMcDyxrr9XAubNQmyRpJ8ZxiWkFsK5trwNOHGq/oAauBRYmOWQM9UmSGH1AFPCXSTYnWd3aDq6q7wO095e19kOBu4aO3draJEljMOonqY+tqruTvAzYkOQ70/RNR1s9q9MgaFYDLFmyZI8L/NnfumCPv0Nzz+Y/PnXcJUhjN9IziKq6u71vAy4Djgbumbp01N63te5bgcOGDl8M3N3xnWuranlVLV+0aMapRCRJu2lkAZFkvyQvmdoGfgH4FrAeWNm6rQQub9vrgVPb3UzHAA9OXYqSJM2+UV5iOhi4LMnUz/mvVfUXSa4HLkmyCrgTOLn1vxJ4G7AFeAQ4bYS1SZJmMLKAqKrbgdd0tN8HvLmjvYD3jaoeSdKu8UlqSVInA0KS1MmAkCR1MiAkSZ0MCElSJwNCktTJgJAkdTIgJEmdDAhJUicDQpLUyYCQJHUyICRJnQwISVInA0KS1MmAkCR1MiAkSZ0MCElSJwNCktTJgJAkdTIgJEmdDAhJUicDQpLUyYCQJHUyICRJnQwISVInA0KS1MmAkCR1MiAkSZ0MCElSJwNCktTJgJAkdTIgJEmdRh4QSfZO8o0kX2r7hye5LsltSf48yb6t/QVtf0v7fOmoa5Mk7dxsnEGcDtw6tP8J4OyqWgbcD6xq7auA+6vqFcDZrZ8kaUxGGhBJFgNvB/607Qc4Dri0dVkHnNi2V7R92udvbv0lSWMw6jOITwP/Bniy7R8IPFBVj7f9rcChbftQ4C6A9vmDrb8kaQxmDIgk+yXZq22/MskJSfbpcdw/BbZV1ebh5o6u1eOz4e9dnWRTkk3bt2+fqQxJ0m7qcwbxNeCFSQ4FNgKnAef3OO5Y4IQkdwAXM7i09GlgYZIFrc9i4O62vRU4DKB9/lLgBzt+aVWtrarlVbV80aJFPcqQJO2OPgGRqnoEOAn491X1DuCImQ6qqg9V1eKqWgr8EvDVqjoFuAp4Z+u2Eri8ba9v+7TPv1pVzzqDkCTNjl4BkeT1wCnAl1vbgmn6z+SDwBlJtjAYYzivtZ8HHNjazwDW7MHPkCTtoT5/6H8d+BBwWVXdkuTlDM4Cequqq4Gr2/btwNEdfR4FTt6V75Ukjc6MAVFV1wDXJNmv7d8OfGDUhUmSxqvPXUyvT/Jt2sNuSV6T5D+OvDJJ0lj1GYP4NPBPgPsAqupG4A2jLEqSNH69HpSrqrt2aHpiBLVIkiZIn0Hqu5L8HFBtYr0P8My5lSRJc1CfM4hfBd7HYCqMrcCRbV+SNIf1uYvpXgbPQEiS5pE+dzGtS7JwaH//JJ8dbVmSpHHrc4np1VX1wNROVd0PvHZ0JUmSJkGfgNgryf5TO0kOYM+m2pAkPQ/0+UN/FvDXSaYW+TkZ+NjoSpIkTYI+g9QXJNkMvInBmg0nVdW3R16ZJGms+l4q+g6D9aMXACRZUlV3jqwqSdLYzRgQSd4PnAncw+AJ6jBY6e3Voy1NkjROfc4gTgd+uqruG3UxkqTJ0ecupruAB0ddiCRpsvQ5g7gduDrJl4HHphqr6lMjq0qSNHZ9AuLO9tq3vSRJ80Cf21x/DyDJflX1o9GXJEmaBK4oJ0nq5IpykqROrignSerkinKSpE6uKCdJ6jTtGUSSvYH3VJUryknSPDPtGURVPQGsmKVaJEkTpM8YxNeTfAb4c+Cp5yCq6oaRVSVJGrs+AfFz7f33h9oKOO65L0eSNClmGoPYCzi3qi6ZpXokSRNipjGIJ4Ffm6VaJEkTpM9trhuS/OskhyU5YOo18sokSWPVZwzive19+NmHAl7+3JcjSZoUfWZzPXw2CpEkTZY+a1Kf2tVeVRfMcNwLga8BL2g/59KqOjPJ4cDFwAHADQwexPtxkhcAFwA/y2BiwF+sqjt24XeRJD2H+oxBvG7o9Y+A3wVO6HHcY8BxVfUaBtNzvDXJMcAngLOrahlwP7Cq9V8F3F9VrwDObv0kSWPS5xLT+4f3k7wU+FyP4wr4Ydvdp72mnp/4F619HYPAOZfBE9u/29ovBT6TJO17JEmzrNd03zt4BFjWp2OSvZN8E9gGbAC+BzxQVY+3LlsZTAJIe78LoH3+IHDgbtQnSXoO9BmDuILB//OHQaAcAfR6cK7N5XRkkoXAZcDf7+o29aOm+Wy4ntXAaoAlS5b0KUOStBv63Ob6yaHtx4G/raqtu/JDquqBJFcDxwALkyxoZwmLgbtbt63AYcDWJAuAlwI/6PiutcBagOXLl3v5SZJGpM8lpjuB66rqmqr6OnBfkqUzHZRkUTtzIMmLgLcwWGjoKuCdrdtK4PK2vb7t0z7/quMPkjQ+fQLi88CTQ/tPtLaZHAJcleQm4HpgQ1V9CfggcEaSLQzGGM5r/c8DDmztZwBr+v0KkqRR6HOJaUFV/Xhqpz2zsO9MB1XVTcBrO9pvB47uaH8UOLlHPZKkWdDnDGJ7kqeee0iyArh3dCVJkiZBnzOIXwUubIsGwWAwufPpaknS3NHnQbnvAcckeTGQqnp49GVJksZtxktMSf5dkoVV9cOqejjJ/kn+YDaKkySNT58xiOOr6oGpnaq6H3jb6EqSJE2CPgGxd5tpFXjqmYYXTNNfkjQH9Bmk/i/AxiR/xmDqi/cymGRPkjSH9Rmk/qP2sNtbWtNHq+oroy1LkjRufc4gAL7B09N1f2N05Uiacufvv2rcJWgCLfnIzbP2s/rcxfQu4H8ymB/pXcB1Sd45/VGSpOe7PmcQvw28rqq2wWASPuB/MFjUR5I0R/W5i2mvqXBo7ut5nCTpeazPGcRfJPkKcFHb/0XgytGVJEmaBH3uYvqtJCcB/5DBqm9rq+qykVcmSRqrXncxVdUXgC+MuBZJ0gRxLEGS1MmAkCR12mlAJNnY3j8xe+VIkibFdGMQhyR5I3BCkosZDFA/papuGGllkqSxmi4gPgKsARYDn9rhswKOG1VRkqTx22lAVNWlwKVJ/m1VfXQWa5IkTYA+z0F8NMkJwBta09VV9aXRliVJGrc+k/X9IXA68O32Or21SZLmsD4Pyr0dOLKqngRIso7BlN8fGmVhkqTx6vscxMKh7ZeOohBJ0mTpcwbxh8A3klzF4FbXN+DZgyTNeX0GqS9KcjXwOgYB8cGq+j+jLkySNF59J+v7PrB+xLVIkiaIczFJkjoZEJKkTtMGRJK9knxrtoqRJE2OaQOiPftwY5Ils1SPJGlC9LnEdAhwS5KNSdZPvWY6KMlhSa5KcmuSW5Kc3toPSLIhyW3tff/WniTnJNmS5KYkR+3ZryZJ2hN97mL6vd387seB36yqG5K8BNicZAPwy8DGqvp4kjUMZoz9IHA8sKy9/gFwbnuXJI3BjGcQVXUNcAewT9u+HphxLYiq+v7UmhFV9TBwK3AosAJY17qtA05s2yuAC2rgWmBhkkN27deRJD1X+kzW9y+BS4H/3JoOBb64Kz8kyVLgtcB1wMHtuYqp5yteNvS9dw0dtrW1SZLGoM8YxPuAY4GHAKrqNp7+oz6jJC8G/hvw61X10HRdO9qq4/tWJ9mUZNP27dv7liFJ2kV9AuKxqvrx1E6SBXT84e6SZB8G4XBhVX2hNd8zdemovW9r7VuBw4YOXwzcveN3VtXaqlpeVcsXLVrUpwxJ0m7oExDXJPkw8KIk/xj4PHDFTAclCXAecGtVDS9Zuh5Y2bZXApcPtZ/a7mY6Bnhw6lKUJGn29bmLaQ2wCrgZ+BXgSuBPexx3LPAe4OYk32xtHwY+DlySZBVwJ3By++xK4G3AFuAR4LSev4MkaQT6zOb6ZFsk6DoGl5a+W1UzXmKqqr+ie1wB4M0d/YvBeIckaQLMGBBJ3g78J+B7DP7gH57kV6rqv4+6OEnS+PS5xHQW8Kaq2gKQ5O8CXwYMCEmaw/oMUm+bCofmdp6+80iSNEft9AwiyUlt85YkVwKXMBiDOJnB09SSpDlsuktM/2xo+x7gjW17O7D/yCqSJE2EnQZEVXmbqSTNY33uYjoceD+wdLh/VZ0wurIkSePW5y6mLzJ4IvoK4MnRliNJmhR9AuLRqjpn5JVIkiZKn4D4kyRnAn8JPDbVOLXWgyRpbuoTEK9iMKfScTx9ianaviRpjuoTEO8AXj485bckae7r8yT1jcDCURciSZosfc4gDga+k+R6njkG4W2ukjSH9QmIM0dehSRp4vRZD+Ka2ShEkjRZ+jxJ/TBPr0G9L7AP8KOq+slRFiZJGq8+ZxAvGd5PciJw9MgqkiRNhD53MT1DVX0Rn4GQpDmvzyWmk4Z29wKW8/QlJ0nSHNXnLqbhdSEeB+4AVoykGknSxOgzBuG6EJI0D0235OhHpjmuquqjI6hHkjQhpjuD+FFH237AKuBAwICQpDlsuiVHz5raTvIS4HTgNOBi4KydHSdJmhumHYNIcgBwBnAKsA44qqrun43CJEnjNd0YxB8DJwFrgVdV1Q9nrSpJ0thN96DcbwJ/B/gd4O4kD7XXw0kemp3yJEnjMt0YxC4/ZS1JmjsMAUlSJwNCktTJgJAkdTIgJEmdRhYQST6bZFuSbw21HZBkQ5Lb2vv+rT1JzkmyJclNSY4aVV2SpH5GeQZxPvDWHdrWABurahmwse0DHA8sa6/VwLkjrEuS1MPIAqKqvgb8YIfmFQyeyKa9nzjUfkENXAssTHLIqGqTJM1stscgDq6q7wO095e19kOBu4b6bW1tz5JkdZJNSTZt3759pMVK0nw2KYPU6WjrXLWuqtZW1fKqWr5o0aIRlyVJ89dsB8Q9U5eO2vu21r4VOGyo32Lg7lmuTZI0ZLYDYj2wsm2vBC4faj+13c10DPDg1KUoSdJ49FmTerckuQj4eeCgJFuBM4GPA5ckWQXcCZzcul8JvA3YAjzCYN0JSdIYjSwgqurdO/nozR19C3jfqGqRJO26SRmkliRNGANCktTJgJAkdTIgJEmdDAhJUicDQpLUyYCQJHUyICRJnQwISVInA0KS1MmAkCR1MiAkSZ0MCElSJwNCktTJgJAkdTIgJEmdDAhJUicDQpLUyYCQJHUyICRJnQwISVInA0KS1MmAkCR1MiAkSZ0MCElSJwNCktTJgJAkdTIgJEmdDAhJUicDQpLUyYCQJHUyICRJnSYqIJK8Ncl3k2xJsmbc9UjSfDYxAZFkb+A/AMcDRwDvTnLEeKuSpPlrYgICOBrYUlW3V9WPgYuBFWOuSZLmrUkKiEOBu4b2t7Y2SdIYLBh3AUPS0VbP6pSsBla33R8m+e5Iq5pfDgLuHXcRkyCfXDnuEvRM/tuccmbXn8pd9lN9Ok1SQGwFDhvaXwzcvWOnqloLrJ2touaTJJuqavm465B25L/N8ZikS0zXA8uSHJ5kX+CXgPVjrkmS5q2JOYOoqseT/BrwFWBv4LNVdcuYy5KkeWtiAgKgqq4Erhx3HfOYl+40qfy3OQapetY4sCRJEzUGIUmaIAaEnOJEEyvJZ5NsS/KtcdcyHxkQ85xTnGjCnQ+8ddxFzFcGhJziRBOrqr4G/GDcdcxXBoSc4kRSJwNCvaY4kTT/GBDqNcWJpPnHgJBTnEjqZEDMc1X1ODA1xcmtwCVOcaJJkeQi4G+An06yNcmqcdc0n/gktSSpk2cQkqROBoQkqZMBIUnqZEBIkjoZEJKkTgaE5r0ki5NcnuS2JN9L8iftmZDpjvnwbNUnjYsBoXktSYAvAF+sqmXAK4EXAx+b4VADQnOeAaH57jjg0ar6M4CqegL4DeC9Sf5Vks9MdUzypSQ/n+TjwIuSfDPJhe2zU5PclOTGJJ9rbT+VZGNr35hkSWs/P8m5Sa5KcnuSN7Z1D25Ncv7Qz/uFJH+T5IYkn0/y4ln7X0XCgJB+Btg83FBVDwF3spM126tqDfB/q+rIqjolyc8Avw0cV1WvAU5vXT8DXFBVrwYuBM4Z+pr9GYTTbwBXAGe3Wl6V5MgkBwG/A7ylqo4CNgFnPBe/sNRX538A0jwSumev3Vl7l+OAS6vqXoCqmlq/4PXASW37c8AfDR1zRVVVkpuBe6rqZoAktwBLGUyaeATw9cFVMPZlMOWENGsMCM13twD/fLghyU8ymOH2QZ55lv3CnXxH3zAZ7vNYe39yaHtqfwHwBLChqt7d43ulkfASk+a7jcBPJDkVnlqC9SwGS13eDhyZZK8khzFYfW/K/0uyz9B3vCvJge07Dmjtf81gdlyAU4C/2oW6rgWOTfKK9p0/keSVu/rLSXvCgNC8VoPZKt8BnJzkNuB/AY8yuEvp68D/Bm4GPgncMHToWuCmJBe22W8/BlyT5EbgU63PB4DTktwEvIenxyb61LUd+GXgonb8tcDf293fU9odzuYqSerkGYQkqZMBIUnqZEBIkjoZEJKkTgaEJKmTASFJ6mRASJI6GRCSpE7/H6faJrOoDn8hAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#看各类样本分布是否均衡\n",
    "sns.countplot(data.Outcome);\n",
    "pyplot.xlabel('Outcome');\n",
    "pyplot.ylabel('Number of occurrences');"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "数据划分与预处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(154, 8)\n",
      "(614, 8)\n"
     ]
    }
   ],
   "source": [
    "# 用train_test_split随机划分训练集与测试集\n",
    "y_data=data['Outcome']\n",
    "X_data=data.drop([\"Outcome\"], axis=1)\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size = 0.2,random_state = 0)\n",
    "print(X_test.shape)\n",
    "print(X_train.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 数据标准化\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "\n",
    "# 初始化特征的标准化器\n",
    "ss_X = StandardScaler()\n",
    "\n",
    "# 分别对训练和测试数据的特征进行标准化处理\n",
    "X_train = ss_X.fit_transform(X_train)\n",
    "X_test = ss_X.transform(X_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "模型训练"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "1、Default Logistic Regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "logloss of each fold is:  [0.50641233 0.45398684 0.47245305 0.58502473 0.46968588]\n",
      "cv logloss is: 0.49751256558230594\n"
     ]
    }
   ],
   "source": [
    "# 交叉验证用于评估模型性能和进行参数调优（模型选择）\n",
    "# 分类任务中交叉验证缺省是采用StratifiedKFold\n",
    "from sklearn.cross_validation import cross_val_score\n",
    "lr=LogisticRegression()\n",
    "loss = cross_val_score(lr, X_train, y_train, cv=5, scoring='neg_log_loss')\n",
    "print 'logloss of each fold is: ',-loss\n",
    "print'cv logloss is:', -loss.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Classification report for classifier LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
      "          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n",
      "          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n",
      "          verbose=0, warm_start=False):\n",
      "             precision    recall  f1-score   support\n",
      "\n",
      "          0       0.84      0.92      0.88       107\n",
      "          1       0.76      0.62      0.68        47\n",
      "\n",
      "avg / total       0.82      0.82      0.82       154\n",
      "\n",
      "\n",
      "Confusion matrix:\n",
      "[[98  9]\n",
      " [18 29]]\n"
     ]
    }
   ],
   "source": [
    "#在测试集上测试，估计模型性能\n",
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix\n",
    "\n",
    "lr.fit(X_train, y_train)\n",
    "y_test_pred_lr = lr.predict(X_test)\n",
    "\n",
    "print(\"Classification report for classifier %s:\\n%s\\n\"\n",
    "      % (lr, classification_report(y_test, y_test_pred_lr)))\n",
    "print(\"Confusion matrix:\\n%s\" % confusion_matrix(y_test, y_test_pred_lr))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "2、正则化的 Logistic Regression及参数调优"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.4968045791337133\n",
      "{'penalty': 'l2', 'C': 0.23357214690901223}\n"
     ]
    }
   ],
   "source": [
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "\n",
    "#需要调优的参数\n",
    "penaltys = ['l1','l2']\n",
    "Cs = np.logspace(-1,0,20)\n",
    "tuned_parameters = dict(penalty = penaltys, C = Cs)\n",
    "\n",
    "#weight={0:0.5,1:0.5} 尝试过调整类别权重，或为“balanced”,发现权重相同也即无权重时分类效果最好\n",
    "lr_penalty= LogisticRegression(\n",
    "                                #class_weight=weight\n",
    "                                )\n",
    "grid= GridSearchCV(lr_penalty, tuned_parameters,cv=5, scoring='neg_log_loss')\n",
    "grid.fit(X_train,y_train)\n",
    "\n",
    "# examine the best model\n",
    "print(-grid.best_score_)\n",
    "print(grid.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "#grid.cv_results_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZQAAAEKCAYAAAA1qaOTAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XmcFPWZ+PHP08fcM8xwDcghoBDkRomoKGokwSNR0UjciAeryybqL9lcG6ObjUY3i8lqLpP1DKJuoiaGiMYYhYgoXkEjoqCgoAgMh8AwM8zZXc/vj6ruOeiZ6empnp7jeSf1qqpvfb/VT7VMPV3Xt0RVMcYYYzorkOkAjDHG9A6WUIwxxvjCEooxxhhfWEIxxhjjC0soxhhjfGEJxRhjjC8soRhjjPGFJRRjjDG+sIRijDHGF6FMB9CVBg4cqKNGjcp0GMYY06O8/vrrn6jqoPbq9amEMmrUKNauXZvpMIwxpkcRkY+SqWenvIwxxvjCEooxxhhfWEIxxhjjiz51DcUY03c1NDSwfft2amtrMx1Kt5WTk8Pw4cMJh8MptbeEYozpE7Zv305hYSGjRo1CRDIdTrejquzbt4/t27czevTolNZhp7yMMX1CbW0tAwYMsGTSChFhwIABnTqCs4RijOkzLJm0rbPfjyUUY4xpxZfuepkv3fVypsPoMSyhGGNMFykoKIhPn3nmmRQXF/P5z38+Yd1rrrmGadOmMWHCBHJzc5k2bRrTpk3jD3/4Q4c+84nnn+D+x+7vTNhJs4vyxhjThtpoLVsPbmV0v9QuVG89uBXgsPbf+c53qK6u5q677krY7le/+hUAL7z1AlddfBVvvvlmSp//zlvv8N6G97jiwitSat8RdoRijOnWFj69kIVPL8xY+3Q544wzKCwsbLdeXSSKo9qsbPPmzcydO5fjjjuO2bNns2nTJgAefvhhJk2axNSpUzn99NOpqanh57f+gsd//3hKRzcdZUcoxpg2xXbGS85ckpH26XDTE++wYWdFu/U2lFXgOA5fe3AreeFdbdadcEQRP/jCxMPKaxuiKcfZmkWLFnHvvfdy1FFHsWbNGq699lqeeeYZbrrpJlatWkVpaSnl5eXk5uZy9beuZfO7m3jgrvt9j6MlSyjG9HLdcYfelTaUeYnjyMzG4Zfy8nJeeeUVLrzwwnhZJBIBYNasWVx22WVcdNFFXHDBBV0emyUUY9KsN/7C70rxhOCjREcSiXzprpepbqjhxxeP5JiBR/seRypUlYEDBya8pnLPPffw6quv8uSTTzJ16lTeeuutLo3NrqEY047ueg6+p9hQVpGWpNBXlZSUMHToUJYtWwaA4zisW7cOgC1btnDCCSdw8803U1JSwo4dO8gvyOdQ1aEuic0Siun1LCF0jiWE9DjllFO46KKLWLlyJcOHD+evf/1r0m0ffvhh7rzzTqZOncrEiRN58sknAfjGN77B5MmTmTx5MnPmzGHSpEnMPOUE3nvnXaZPn24X5Y0xnWPJoPuoqqqKT7/wwgtJtRk2cjjLX3iqWdmYMWMSJqDly5cfVtZ/4AB+v2JZl5yys4Riur2+fg3BZM4j/3oiGz95P9Nh9BiWUEzaWULoHDvCMD2FXUMxxhjjCztCMe2yI4zOsSMM01dYQukDLCF0jiUEY5Jjp7yMMaY1S85h5B8XZTqKHsMSiun17DkK012k0n3952edyfQRk1Lqvn7ZsmXcd8c9nY47WRk55SUi/YFHgFHAh8B8VT3QSt0iYCOwTFWv9cqygDuA0wAHuEFVH0t74Blip6yM6X2S7b5+xRur+Ooli1rtvj4SiRAKJd6Vz5s3j/GnTPYn4CRk6gjlOmClqo4FVnrzrbkZeL5F2Q3AHlUdB0xIsNwYY7q1ZLuvT+Tkk0/mhhtuYPbs2dxxxx08/vjjzJw5k+nTp/O5z32OPXv2AHDvvffy3zfcAsCCBQv4+te/zkknncSYMWPiXbf4KVMX5c/DPboAWAqsAr7bspKIHAeUAk8DM5os+mdgPICqOsAn6Qu18+wIo3PsdJXx3V+ug13r26+36y1y1HGvo4Rz2647ZDKctdif+JJQUVHB6tWrAThw4ADnnnsuIsKdd97Jbbfdxq233npYmz179rBmzRrWr1/P/PnzmTdvnq8xZSqhlKpqGYCqlonI4JYVRCQA3AZcCpzRpLzYm7xZRE4DPgCuVdXdaY/aGGO6iYsvvjg+vW3bNubPn8+uXbuoq6tj3LhxCducf/75iAhTpkxhx44dvseUtoQiIiuAIQkW3ZDkKq4GnlLVj0WkaXkIGA6sUdVvisg3gf/BTTyJ4lgELAIYOXJkkh/dnB1hGNPLJHskseQcahtq2HbB3d2m+/qY/Pz8+PQ111zD9ddfz9lnn82KFStYvDjx9mVnZ8entcVbIP2QtoSiqnNaWyYiu0VkqHd0MhTYk6DaicApInI1UABkiUgV8D2gGoidAPw9cGUbcdwN3A0wY8YM/79B0y47ZWVMeh08eJBhw4ahqixdujRjcWTqovxy4HJv+nLg8ZYVVPUSVR2pqqOAbwMPqOp16qbVJ2i8BnMGsCHtERtjjI860319SzfeeCPz5s3j1FNPpbS01McoOyZT11AWA4+KyJXANuAiABGZAXxFVa9qp/13gQdF5GfAXsBedmGM6fb86r7+xRdfbDZ/4YUXNnslcMxVV13FrPNPA+Chhx5qNRa/ZCShqOo+mlxob1K+Fjgsmajq/cD9TeY/AmanL0JjjAEW/plt1n190qwvL9MuuwZijEmGdb1ijDHGF5ZQjDHG+MISijHGGF/YNZQ+wK6BGJOahU8vpLqhhhtPvCXTofQIdoRijDFdJNZ9/ZtvvsmJJ57IxIkTmTJlCo888shhdf3ovh5gw7p3eGHlal/ib48doRhjTBfLy8vjgQceYOzYsezcuZPjjjuOuXPnUlxcHK+TbPf17dnw1jtsfncTi770z77E3hY7QjHGmC42btw4xo4dC8ARRxzB4MGD2bt3b9LtN2/ezNy5cznuuOOYPXs2mzZtAuDhhx9m0qRJTJ06ldNPP52amhp+fdsdPPnY8pSObjrKjlCMMX3Ora/dyrv732233rv738VRhxtf/g/y2um+fnz/8Xz3+MPewtGu1157jfr6eo466qik2yxatIh7772Xo446ijVr1nDttdfyzDPPcNNNN7Fq1SpKS0spLy8nNzeXq791LZvf3cQDd93f4dg6yhJKD2AX1Y3pncrKyrj00ktZunQpgUByJ4zKy8t55ZVXmnW1EolEAJg1axaXXXYZF110ERdccEFaYm6LJRRjTJ+T7JFE07u8/O6+vqKignPOOYdbbrmFE044Iel2qsrAgQMTXlO55557ePXVV3nyySeZOnUqb731lp8ht8uuoRhjTBerr69n3rx58aOJjigpKWHo0KHxV/g6jsO6desA2LJlCyeccAI333wzJSUl7Nixg/yCfA5VHfJ9GxKxhGKMMV3s0UcfZfXq1dx///3x24E7chfXww8/zJ133snUqVOZOHEiTz75JADf+MY3mDx5MpMnT2bOnDlMmjSJmaecwHvvvMv06dPtorwxxvQWsS7jFyxYwIIFC5Jqk6j7+jFjxiR8f8ry5csPK+s/cAC/X7GsS944aQnFGGNaseTMJWy07uuTZgmlC9hdWsaYvsCuoRhjjPGFHaEYY7q16377oTuR4ou+4+2TvzO3mSH7692JgT2z/RH7ajvVviMsoRiTZr7tEPto+2ytSa2hT+0D6vTo9l3JEorp9TK9Q+zpMr1Dz6SPLr2MSM0hBv7XTZkOpUewaygm7a777YeNO+UMtO/psrWmUzvlzrY3/kml+/qZ55zHkKkzUuq+ftmyZfziviW+xd8eO0JJQl+/S6uv/0LvLNuZm5Y60n39Wyv/ysVfvbbVBx8jkQihUOJd+bx58/jMhLH+b0ArLKH0AZYQOscSgvHbuHHj4tNNu69vmlDacvLJJ3PqqafywgsvcMEFFzB69Gh+9KMfUV9fz6BBg3jooYcYPHgw9957L6+/8DyLr/8uCxYsYMCAAfz9739n165d3HbbbcybN8/X7bKE0gNYQjDGX7t+9CPqNrbffX3tu++i0Sif3PADynPz26ybfcx4hlx/fYdjSaX7enA7l1y92n0T44EDBzj33HMREe68805uu+02br311sPa7NmzhzVr1rB+/Xrmz59vCaUnsoSQWXaEYbqrVLqvj7n44ovj09u2bWP+/Pns2rWLurq6ZkdATZ1//vmICFOmTGHHjh2dij0RSyim27OEYPyW7JFE07u8+n1qkq8xpNp9fUx+fuMR0zXXXMP111/P2WefzYoVK1i8eHHCNtnZ2fFpVe140O2whJIEO8IwxvipM93XJ3Lw4EGGDRuGqrJ06VIfIkyN3TZs0s5uWzWmuc52X9/SjTfeyLx58zj11FMpLS31MdKOsSMUY4zpIql0X3/k8GG8/MSyZmUvvvhis/kLL7yw2SuBY6666iouOsU9nfbQQw8ljMVPllCMMaYVRz74AAffezvTYfQYllBMu+x0lTEmGXYNxRjTZ6TjzqbepLPfjyWUPsAuihsDOTk57Nu3z5JKK1SVffv2kZOTk/I67JSXMaZPGD58ONu3b2fv3r0dalezezcAuU4wpc/tSe1zcnIYPnx4Sp8DGUwoItIfeAQYBXwIzFfVA63ULQI2AstU9Vqv7J+A6wEFdgILVPWT9EdujOmJwuEwo0eP7nC7p/6fe/fU2Ss2pPS5Pb19R2TylNd1wEpVHQus9OZbczPwfGxGRELAz4HTVXUK8BZwbRpjNcYY045MJpTzgNgjnUuB8xNVEpHjgFLgmabF3pAvIgIU4R6l9Ep2DcQY0xNkMqGUqmoZgDce3LKCiASA24DvNC1X1Qbgq8B63EQyAbgv0YeIyCIRWSsiazt67tQYY0zy0ppQRGSFiLydYDgvyVVcDTylqh+3WG8YN6FMB47APeX1vUQrUNW7VXWGqs4YNGhQJ7bGGGNMW9J6UV5V57S2TER2i8hQVS0TkaHAngTVTgROEZGrgQIgS0SqgMe89X/gretR2r4GY4zpxlQVHAccx51uMi+Oe5tvtKKi+XJvUMdxb83Ba6OKOurOqxKKKCjUbd3aWC9R26brVLcNqmTVu59fs25d82W03S62PKfWna5asybepOny+HSsvTcdiyuvxi2rXLkyXjf+HcTWR5N1qrdNGgVVCmoav79gUVHq/5GSkMnbhpcDlwOLvfHjLSuo6iWxaRG5ApihqteJyBHABBEZpKp7gc/i3gXWLdn1j95JHQeNRKChAY1G0UgEbYhApMGd9oasekUUqt94w63vOGgkCtFIvB3RKBqJotEW05EoRZWKAJ/cdTc43jInClHHnY86XjsnXt50ftA+d4ey/Wtfd5c76n5GbAeesMxx59XhiN1u/B98/vNuPcdB1Tl8Oho9PDE4DkdWuZ//7rTp8UTRLGm081zIKG+86fiZKf13GuGNt5x1dkrth3njD790cZv1WjPUG3985VUptY919bj9mtTuO4qdl4lsWktwxmdSWkeyMplQFgOPisiVwDbgIgARmQF8RVVb/fZVdaeI3ASsFpEG4CPgivSHbLqaNjQgjrtDaygrQ+vr0fp6nLr6+LTW17VS7i4rPui23/WjH6ENDWhDg5sEYkO9lwCaljUZRpS57d+beYLbzksUOE5S2xDbIX305UvarNeaAd54709/2lgoAsEgEghAKOSO4/NBJBCEYAAJBMlucH+w1m35wCsPIk3bBwJuXREkFEAkCKIgIKJItoMIZJcWAorg4K7RQbyxO92kXKPuL2WiVO/Yi4iSN6TEraOO++sZByEKjjuNRhvbaaRxfdK4ye4E8XKJz9OkXpME1bJOk/bSol28vghI0P1eJECkrhYQwgVF8TICXh1x5yUQ8NqIt7yx/cEP3gWE4vFT4vXdZW5dic+3GAICgSC7XngWFIaefra3TJrE4NVFGteLePEEQGDrHx9CgfBRx3TgX11qMpZQVHUfcEaC8rXAYclEVe8H7m8yfydwZ/oiNK1RVbSuDqemBq2uxqmtxamuwampRmtqcGpqcWoa54srFHFg1y3/hdbV4tTWobW1OPV1aGy6LnEZ0Wj8F+r7p6f266oYdzd38I/LkHC4zSGQmwNFhc3K9u/7GBU48vOfR0IhJBxyd+KhcON8MNhsXkKNdd744ddR4NO33ouEgkgwCMEQEvJ27qGQV9Z0OhSv+8wFJwIw94nXkGgtRGuRSA00HIL6Q1BfDfVV0FDdYtpdvvPp9QSCypBPfwoitdBQ02JcC5EaiNYn/P5GjIxNPdXONy0QyoZgtjsOZUMwi8qBFTiO0O+YfAhmQTDsDVmN40DLssbpjff9AlVhwrXf93bUYQiE3DqBULvTqxddgCqcuvQZr32ocZBA8/lAsEnm8rZ6zgQg9ec4no+1f+DplNpvfMxtP3rR3Sm133LvnwA4pmRoOzU7z56U7wtUCSg07NyJc+gQzqFDRL2xc6jaHVdXx5e1HIbtdhPCplknu0mkpqbd0xRNleD+hj24fDmB7GwkJwfJziKQnYPk5BDIzyc4YACBnGwkOwfJySaQ5dYL5GTz7v2/QAUmf/tmJCuryZCNZIUJxOazs5stj5X/5cwpIMLZK/6e0te3bs6TAMz8/n90vLEqdbcpoZBS8KmBUFcJdRXeuBKqKw8vazZUcMaJZYSCSuAnHXiCWQIQzoesfPoV1hONBtxEE8qBgsHuOJx7+DicC6FcCOfEx699/+s4Kpzws4chlNUiYWS7ZaEcbwcth4XyQmyHetuzHf/+gK0/XALAhOP/JaX2VdVhd6J/xx9qNB1jCaWbU1XEUQIO1G3eTLSyEqeykmhFJU5VJdHKKpzKCre8opJoVZNxZRXRykpGV7vrev8zhx0QNhcMEsjP94Y8d0efn09DEJwwDPrsHAI5uQTycpHc3Ph0IDcXaWP6r+fN8Hbor6b0HVQ89ksAir/4xZTaJ9rJdYwSDCgc3A415VBbnty45gDUHmTuSQ3uan51fOsfEcqF7MLmQ/GRkF3Izg1PEI0KR132b5CVD1l5kFUA4Txv3hvCXnlWnruD97Y7/gv5f1aktPWflH/XnRjx6ZTam77DEkoXcmpriZaXEz14kGj5QW+63J0+WB5f5sTnDxI9eJBR3pmILV84N+F6JRwmUFhIsLDQHRcVEho8mEBhAcHCIj744/04ApP+/RYCeXlNkkbzQbKy3PPHLbzt7ZCOv/HG1Da80zt0nzXUQvW+FsP+BGXuMPekMoIB4KcTE69PApDTD3KKIbfYHfcbHp9/98H7aIgEmPy92yG7yE0WWQXNk0cw3Gq4G379AgBHnfqdVusY0x1YQklCa3dpObW1RPftI7J/P9H9+4ns2090/z5vvN8t37ePEWVKIArvTZve6mdIdjbB4mKC/foR7NePrFGjCRa705t/fx+OwNQf3E6gsIhgYUHjuKiIQHZ2m/G/vsLtkKA4wRvdeo26KqjaDVV7vHHjMGPCJ2SFHfjZZDi0z7320JrcEsgb4A7FI+GIaXz4xJ9oaAgw/ms3NyaN3JLG6axC72JoYlsWPwrA5Em9+Ps3BksoSSmsUrLr4eOvXk1k/z6iXsJwqqsT1pecHEIDBhDs35/QoEHUfAjRAIz91296CaPYTR5ewggWFxNoo8votU//BoCis85Kx+Z1X6pQc4Ci/HpysqLw+tIWCWNP4zhRkpAgFAwmO+xQ1xCAI2d5yaJ/Y9JoOuQUQ/DwP4n3fvkcAOOPuyLNG2xMz5ZUQhGRWcCbqnpIRBYAxwI/V9WP0hpdN5FT5w4Nu3YR6t+frCOPJFTSn+CAAYQG9HcTR39vvn9/Anl5zdqv904ZDfyX1C4q9lr11VCxAw5+DAd3uNcoKra749h8pIaTYwd2T3zNHecUQ0Gpe3F5+IzG6WbjUsjtD4EAa2LXEH5hNwUak07JHqH8LzBVRKYC/47bb9YDwKnpCqw72TvAvQZw9rI/ZjiSHqa2AvZvYciAGvJyIvDnbzdPIDX7WzQQNxH0Gw6lE2DcXCgaxus/+wm1dUFm3f+cd4dS26f4jDGZkWxCiaiqen1w/VxV7xORy9MZmOkBVN2L2fu3uMOBrY3T+7dCtft6mmNjz1Ot/72bLIqGwfDjod8w6DfCne83HAqHuregtrD7+3e4E8UjDltmjOk+kk0olSLyPWABMFtEgkDrt6WYXiUrHCU/NwJvPNgicWx1n5+IEzcx9B8Nx3we+o+BktG8cN23qK4LMvev72ZsG4wx6ZdsQvkS8GXgSlXdJSIjgZ+kLyyTEbUVsPdd2LMBdm9wx3s2Mmem9yLM5de6D68Vj3STxYiZUDLane4/BkqOTHg6qrLa+u00pi9I+ggF91RXVETGAeOB36UvLJNWkTr4ZBPs2Qi733HHezbCwW2NdcL5MHg8fOosNjy2nKrqEMffvdI9RZXgTihjjEl2z7Aatxv5EtzX9a7FPWpJrbc703Wq9jK4fw1F+Q3w6OVu4tj3vtc5H26/SAPHwYjj4bjLoXQiDD4G+o2MP1vx4S//5ta1riuMMW1INqGIqlZ7PQP/UlV/LCJvpjMwk4KGGihbB9vXwo7XYcdaKN/GjAle11tl62DwBJhwrps0Bk+AAUe3+ZS2McYkK+mEIiIn4h6RXOmVBdMTkkmK48C+zW7i2L7WTR673wEn4i7vNwKGHQfHL+Ll//kZFVVh5j5jvwGMMemTbEL5N9xX7C5T1XdEZAzwXPrCMi1lhaPw3l8ak8eOf0DdQW9hIQw7Fk76mvug37AZUFgab3ug4n8zFLUxpi9JKqGo6vPA8yJSKCIFqroF+Fp6Q+vjqvbCh6th62pOPW4X+blR+N3FbncipRNg0gWNyWPguDb7kjLGmK6QbNcrk3GfjO/vzspe4DJVfSedwfUpNQfgwzXw4QuwdbV7yy5AdhFV1WG27crnmB8+AkOnud2TG2NMN5PsKa+7gG+q6nMAInIacA9wUpri6v3qqmDbK7D1eTeBlK0D1H0vxpEnwpT5MGo2DJ3K63OnAHDMkfZ1G2O6r2QTSn4smQCo6ioRyU9TTL1SQNRNHFu9I5Ada90L6MEstxuS066D0bPdC+nWV5UxpgdKNqFsEZHvAw968wuArekJqRc5uB3e+wufnvgJ/fvVwdIvuNdAjpjuXkAfPdt92txOYRljeoFkE8o/AzcBfwQE90HHhekKqsdShV3r4b2n3KFsHQC52SG2leUz+jtLYOSJkFOU4UCNMcZ/yd7ldQC7qyuxSD189CK8+5R7W2/FdkDcI485N8H4c1h98XkAjB43N7Oxmh7p/vluD8xnW/uMtL/pkmP6dPuOaDOhiMgTgLa2XFUTv+S8t6sph83Pukch769we9wN5cLRZ8Dp34Oxc6FgUKajNJ5M75A6276n+yh8VI9ub5LX3hHK/3RJFD1AbnYEXvlfN4l89JJ7QT1/MEw8Hz51Dow5FcK5mQ6zW7IdcmbZDtl0lTYTivdAY5937Ph9DBlYC09fB4PGuxfUP3W2e0dWD3ig0HboPZvt0E1PkeyDjes5/NTXQdxeh29R1X1+B9ad7DmQw4GKLI756SoY0PV/3LZD79ksIZi+Itm7vP4CRIHfevMX497tdRC4H/iC75F1I9t3u4/cHJNiMrGE0LNZQjAmOckmlFmqOqvJ/HoRWaOqs0RkQToC604sIfRslhCM6RrJXgAoEJGZsRkROR4o8GYjvkdljDGmx0n2COUq4DciUoB7qqsCuNLrfuW/0xVcX+eoQ320niiKA+ys2kl9tJ4Gp6FxiDZQ79QTcSI0RBuaLYvV3Rd0UODXb/6aiBMhqlGiTpSoRok4ESIaaTYfWx5bti0cRYGFTy/EUSc+RDXaOI+D43jjFnU+yXLfDnnaI6eh3qU4VcXBQVXdMqX5vLf9qkp9lgMo0x+c3vjltLiipy0Kms474gAw9YGpCAIQHzeOGstFmteplToAZv7fTESE2P/c/0u8LCCBZuuI/a9G9gPwuT98Ll4nIAECEkjYPlYekAAiQq18BAiXPHUJAQKHr0MkXi4izdYbIECd7ATg289/mwCNdZp+TtP1tFxeL3sBuP3125t9TlCCCT87to7Y8gbKAXj0vUcTfm6r20GAYCBIlEMAvLTzpWbrD0ig1Rhi6w9KEId6AD6u+DjhtreMu9l6JdDs32PTfx/mcKLa6mMmh1cW6ee1KU9fSOkzY8YMXbt2bYfbzb97GgCPLnJfUKWq1Dv1VDdUUxOpaRxHEs//9u934ABnTLiQukgdtdFa6qP11EZrqYvUURdtMXhl9U69n5sPuDuhkIQIBoLxcVCCBANBwoFwfDooQUKBECEJ8cHetwGYOHTGYTuB2B9eUIKN5QQIBALxP/JVm5YjwJxjLozvZOM73JY74ybzsZ32g+v/BMAVUy5sti3xpNCK2B/+b9b9AYCFLdo3TW6x+Xgi0sY6D739OABfnvgFHHUa63rJ77Ak6JXF6j2+aQUA5xx9WrM6sYQZm44v85JtLMm++LH7b/aEYdOaLVdVL2m7yTwam1YHx1EvySsfVewAYGjBIBxVVB0cvHHL9cWTeuO4Luom1GAgCLHYcWjjEbXeTWM/OAK4v0i8Hxjuv9wWZQEanAiokB3KarIsEK8TqxdbR8vyivoqUKE4pyje1o0gEP97aWwT+7tpnC+r2gMIj877NRNLR6a0ySLyuqrOaK9esnd59QN+AMz25p8HfqiqB1OKrocpCzlUi/KZRz8TTxSxHUBSgu65xee2PUdOKIfsYHazoSi7qNl8TiiHrGAWOUF3/PBrv0AUrj71JsLBMOFAmKxAVnw6NmQFsxrng2FCgRDhQJgrl85GgEf+5R/xHXdHxBLqkjOXdLgtwMzNGwH4wYk/SKn9I2+tAeDrx349pfYPvbkKgK8dm3xnD6pKXcShpj7K795Yh2qQc0d8hYaoQ13EoSHqUO+NG8u0WXmsXsOebFQDhPt/gYjj4ESUBsetH4k2tos4Dg3xZQ4Rr7yq/GRUA7y+fQARp7G8IergdGCfXtnB7y2Jb6n5IIqIEhAIBhqna50aBIf8rFxEQMRB0Pi0u/9Tr0yuTCKKAAAUQklEQVTd3aM4iADi8EnNfkSUQXklIAq47d1pt53GppuNHRBlz6G9gFJaMMBL/I3xNt8Gp7FMnHj53up9gPv5Kk3q0vijATdFN1tPbL629iCghLML3DLBra+NMag6aJM2sZ834KARARxq6oJeeeyzI823Bz18XhQCEUCprKvz/V9AS8me8voN8DYw35u/FFgCXJCOoLqbsEIOwuzhs8kN5ZIbyiUvnOeOQ3nkhr1xK/OX3PdpAB69YlVKn//My3cAMG/svJTax3/PpJBMeoKGqENVbYTK2giVdQ3uuDZClTd9aN8UHCfMTU+8Q21DlJr6KDUNUWoaHGrj003K66PURqI0Hrx/CYC5P1udYoSTQRwee2M7WcEAoaAQCgTICgUIBYRwMEA46I5DQaEgHHKnA0I4FGBH7ToQh8+On0DYa9u0fryut56QN++uP8D1q36MiPKzz11PKBAgEICgCKGgEBBpLAsIoYBbFgw0Dl949F8QlGe+vISACIEA7rjltJDwdNDMJe6R4asLH0vp24u1f6mT7VcvXNyp9i8svLNT7V9Z+Nt2arbd/tWFf+hU+xNGjk2pfUckm1COUtWm5wtuEpGUX1AuIv2BR4BRwIfAfK+/sJb1osB6b3ZbrKsXERkNPIz7wq83gEtV1f/zQ56BUXdHfONJN6brI7q1rrhLqiHqcKC6ngOHGrxxPQeq3enKPcfjRLP4yoOvU1nX0CR5RKisbaC2ob2jxRmAwx9e305uOEhuVpDccJCcsDseVJjdOJ8VcOuEg+R49W5/7W5Eovz3Z75BVsjdaWeFAmQFA4SDAa/MnW+6POwtP2npF4HO7FD/C4D/vuAbKbX/4evbADjtU4NTah8M1QBQkp+VUnvTdySbUGpE5GRVfRFARGYBNZ343OuAlaq6WESu8+a/m+hzVXVagvJbgZ+q6sMicidwJWAvTu9Gahui7K2sY09lLbWVR+JEcvnFys0cqK6nvLqB/YfqKa+uZ391PeWHGqisa+NmQRlPIFjHB3urKMwJUZyXxfD+eRTlhCjMCVOQHaKwyXS8PMctP/uRBUjASXmHfuemTQCcM2VoSu2N6SuSTShfBZbGLsoD+4ErOvG55wGnedNLgVUkTiiHEfeY+jPAl5u0vxFLKK3y6whDVamojbC3spY9FXXsqayLJ409lXVeWS17K+uoqG2aIM4A4PZnN1GQHaI4L0z//CyK87IYPTCf4rwsSvKy6J8fpjgvy1vm1inJy+LUh9wzrc+mmBAk0IHrXcaYlCXbff2bwFQRKfLmKzr5uaWqWuatq0xEWjsWzxGRtbjPuixW1T8BA4ByVY3tsbYDw1r7IBFZBCwCGDkytTsc+oqoo5QdrGH7gRo+3l/tjg9Us3/bWTiRAsZ//2nqIofvnLNDAQYXZTO4MIdxpYXMOnoggwvd+UFF2XzrbzcQCNXw0sIHyAr1zus4xpj2u6//ZivlAKjq7W20XQEMSbDohg7EN1JVd4rIGOBvXp9iiZJZW13s3w3cDe5twx347F7HcZS9VXV8vL+ajw9Us32/mzBiiaOsvJZIk9uGRGBIUQ4ghHP2cOm0KQwuzGFwUTaDYgmjMJuinFCb9+aHX3Kfw7BkYkzv1t4RSmGqK1bVOa0tE5HdIjLUOzoZCuxpZR07vfEWEVkFTAceA4pFJOQdpQwHdqYaZ0/Q0VNWh+oifLC3ik27q9i8p5ID2+cQre/H+P98mvoWRxiDCrMZXpLL9BElfGFKLiP65zG8JJcRJXkMLc4hOxSM3yVywzn2jjVjTOva677+pjR97nLgcmCxN368ZQURKQGqVbVORAYCs4Afq6qKyHPAF3Hv9ErYvi84VBdh854qNu+ujI837a5iR3nj/RJZwQBOsIBQ9n6uOHY6w5skjOElueSEgxncAmNMb5LsRfk4EXlDVY/t5OcuBh4VkSuBbcBF3rpnAF9R1auAY4C7RMTBfS5wsapu8Np/F3hYRG4B/gHc18l4ujXHCRGtK+HRtR83SR4tEkcowFGDCjjuyBIu/vQIxpYWMra0gCP75zHrgYsA+N7Z12ZqE4wxfUCHEwrx3o9S570/5YwE5Wtx+w1DVV8CJrfSfgtwfGfj6G5UlV0VtWwsq2DDzgo2lFWwsaySvZ9cCgj/vu0tsr3EMWNUCV8uHcnRgwsYV1rIiJJcQkG7RmGMyZxUEsqffY+il0t0DaQh6vDB3io3ceysYOMud3yguiFe58gBeUwYWsQeXU0o+wCPf/kWRvTPIxiwzumMMd1PhxOKqv5HOgLpzZxomEjdAJas2Ro/8ti8u4r6qHuBPDsU4FNDCpk7cQgTjihiwtAiPjWkkMKcMAAzl/wIgFED8zO2DcYY055kO4espPVXAH/LOwVlcG/N3fLJId7YdoB/bDvAGx+Vs3f3AkC46eMNDMjPYsIRRSycNSqePEYPzLfTVcaYHi/ZI5TbcW/N/S3uNZSLcZ8xeQ+348jT0hFcT1BZ28CbH5fzxkflvLHtAG9+XM7BGve0VWFOiOkjS9geeZ5wzic8c/mtDCrMtvcpGGN6pWQTypmqOrPJ/N0i8oqq/lBErk9HYN1RoqOPTXsqUXUfAhw7uICzJg1h+shijh1ZwlGDCggEhJlLbgVgcFFOhrfAGGPSJ9mE4ojIfCDWf/IXmyzr9U+fb6w6h7pDI5l+87Pxo48i7+jjrMlDOHZkCdNGFlPkXfMwxpi+KNmEcgnwc+DXuAnkFWCBiOQCvf7hhoaaUqIN+Zw11U0exx5ZzJiB7tGHMcYYV7KdQ24BvtDK4hf9C6d7Khr6PCKw+MIFmQ7FGGO6raRuLRKRcSKyUkTe9uaniEifuX3YrqEbY0z7kr1X9R7ge0ADgKq+hXunlzHGGAMkn1DyVPW1FmVtvGLPGGNMX5NsQvlERI7Cu6NLRL4IlKUtKmOMMT1Osnd5XYP7kqrxIrID2Ip755cxxhgDJJ9QdgBLgOeA/rhvTbwc+GGa4jLGGNPDJJtQHgfKgTfo5W9HNMYYk5pkE8pwVT0zrZEYY4zp0ZK9KP+SiCR82ZUxxhgDyR+hnAxcISJbgTrcHodVVaekLTJjjDE9SrIJ5ay0RmGMMabHS7Yvr4/SHYgxxpiezV4TaIwxxheWUIwxxvjCEooxxhhfWEIxxhjjC0soxhhjfGEJxRhjjC8soRhjjPGFJRRjjDG+sIRijDHGF5ZQjDHG+MISijHGGF9YQjHGGOMLSyjGGGN8kZGEIiL9ReRZEdnsjUtaqRcVkTe9YXmT8v8TkfdE5G0R+Y2IhLsuemOMMYlk6gjlOmClqo4FVnrzidSo6jRvOLdJ+f8B44HJQC5wVVqjNcYY065MJZTzgKXe9FLg/I40VtWn1AO8Bgz3OT5jjDEdlKmEUqqqZQDeeHAr9XJEZK2IvCIihyUd71TXpcDT6QvVGGNMMpJ9BXCHicgKYEiCRTd0YDUjVXWniIwB/iYi61X1gybLfw2sVtUX2ohjEbAIYOTIkR34aGOMMR2RtoSiqnNaWyYiu0VkqKqWichQYE8r69jpjbeIyCpgOvCBt44fAIOAf20njruBuwFmzJihKWyKMcaYJGTqlNdy4HJv+nLg8ZYVRKRERLK96YHALGCDN38VMBf4J1V1uiRiY4wxbcpUQlkMfFZENgOf9eYRkRkicq9X5xhgrYisA54DFqvqBm/ZnUAp8LJ3S/F/dm34xhhjWkrbKa+2qOo+4IwE5WvxbgFW1ZdwbwtO1D4jcRtjjGmdPSlvjDHGF5ZQjDHG+MISijHGGF9YQjHGGOMLSyjGGGN8YQnFGGOMLyyhGGOM8YUlFGOMMb6whGKMMcYXllCMMcb4whKKMcYYX1hCMcYY4wtLKMYYY3xhCcUYY4wvLKEYY4zxhSUUY4wxvrCEYowxxheWUIwxxvjCEooxxhhfWEIxxhjjC0soxhhjfGEJxRhjjC8soRhjjPGFJRRjjDG+sIRijDHGF5ZQjDHG+MISijHGGF9YQjHGGOOLUKYD6AlG1X870yEYY0y3ZwmlC1hCMsb0BZZQegBLSMaYnsASSh9gCckY0xUsoZh2WUIyxiQjIwlFRPoDjwCjgA+B+ap6IEG9KLDem92mque2WP5LYKGqFqQ14E565F9P7NPtjTF9Q6aOUK4DVqrqYhG5zpv/boJ6Nao6LdEKRGQGUJzGGONsh9o5ry58rFPtJwwt8ikSY0w6ZSqhnAec5k0vBVaROKEkJCJB4CfAl4F5PsdmupklZy7pVHtLSMZ0jUwllFJVLQNQ1TIRGdxKvRwRWQtEgMWq+iev/Fpgude2C8I1PZklJGO6RtoSioisAIYkWHRDB1YzUlV3isgY4G8ish6oAS6i8QinvTgWAYsARo4c2YGPNsZlCcmY5KQtoajqnNaWichuERnqHWEMBfa0so6d3niLiKwCpuMmlKOB972jkzwReV9Vj25lHXcDdwPMmDFDO7FJxqTEEpLpKzJ1yms5cDmw2Bs/3rKCiJQA1apaJyIDgVnAj1V1A02OfESkqrVkYkxvYAnJ9BSZSiiLgUdF5EpgG+4prNidW19R1auAY4C7RMTB7cRysZdMjDEdYAnJdJWMJBRV3QeckaB8LXCVN/0SMDmJdXXrZ1CM6ekynZAsofUc9qS8MSatOpuQOssSUtexhGKM6dZ6ekLqSwnNEooxplfrbELq6QmtK1lCMcaYNMp0QuvKhGQJxRhjerGuPMKyd8obY4zxhSUUY4wxvrCEYowxxheWUIwxxvjCEooxxhhfWEIxxhjjC0soxhhjfGEJxRhjjC8soRhjjPGFqPadlxiKyF7goxSbDwQ+8TGcnsC2uW+wbe79Oru9R6rqoPYq9amE0hkislZVZ2Q6jq5k29w32Db3fl21vXbKyxhjjC8soRhjjPGFJZTk3Z3pADLAtrlvsG3u/bpke+0aijHGGF/YEYoxxhhfWEJphYhcJCLviIgjIq3eHSEiZ4rIeyLyvohc15Ux+k1E+ovIsyKy2RuXtFLvx953s1FEfiEi0tWx+qUD2zxSRJ7xtnmDiIzq2kj9k+w2e3WLRGSHiNzRlTH6LZltFpFpIvKy92/7LRH5UiZi7Yz29kciki0ij3jLX/X737EllNa9DVwArG6tgogEgV8BZwETgH8SkQldE15aXAesVNWxwEpvvhkROQmYBUwBJgGfBk7tyiB91u42ex4AfqKqxwDHA3u6KL50SHabAW4Gnu+SqNIrmW2uBi5T1YnAmcDPRKS4C2PslCT3R1cCB1T1aOCnwK1+xmAJpRWqulFV32un2vHA+6q6RVXrgYeB89IfXdqcByz1ppcC5yeoo0AOkAVkA2Fgd5dElx7tbrP3RxlS1WcBVLVKVau7LkTfJfPfGRE5DigFnumiuNKp3W1W1U2qutmb3on7o6Hdh/m6kWT2R02/hz8AZ/h5hsESSucMAz5uMr/dK+upSlW1DMAbD25ZQVVfBp4Dyrzhr6q6sUuj9Fe72wyMA8pF5I8i8g8R+Yn3a7CnanebRSQA3AZ8p4tjS5dk/jvHicjxuD+aPuiC2PySzP4oXkdVI8BBYIBfAYT8WlFPJCIrgCEJFt2gqo8ns4oEZd36trm2tjnJ9kcDxwDDvaJnRWS2qrZ6ajDTOrvNuH8npwDTgW3AI8AVwH1+xJcOPmzz1cBTqvpxT7lE5sM2x9YzFHgQuFxVHT9i6yLJ7I/Sus/q0wlFVed0chXbgRFN5ocDOzu5zrRqa5tFZLeIDFXVMu+PKtF1gnnAK6pa5bX5C3ACbVxryjQftnk78A9V3eK1+RPuNnfbhOLDNp8InCIiVwMFQJaIVKlqt73xxIdtRkSKgD8D/6Gqr6Qp1HRJZn8Uq7NdREJAP2C/XwHYKa/O+TswVkRGi0gWcDGwPMMxdcZy4HJv+nIg0VHaNuBUEQmJSBj3gnxPPuWVzDb/HSgRkdj59M8AG7ogtnRpd5tV9RJVHamqo4BvAw9052SShHa32fsbXoa7rb/vwtj8ksz+qOn38EXgb+rnw4iqakOCAfeX+HagDvei81+98iNwTwXE6p0NbMI913pDpuPu5DYPwL0DZrM37u+VzwDu9aaDwF24SWQDcHum4073NnvznwXeAtYD9wNZmY493dvcpP4VwB2Zjjvd2wwsABqAN5sM0zIdewe387D9EfBD4FxvOgf4PfA+8Bowxs/PtyfljTHG+MJOeRljjPGFJRRjjDG+sIRijDHGF5ZQjDHG+MISijHGGF9YQjHGRyJS1cn2fxCRMd50gYjcJSIfeD3grhaRmSKS5U336QeTTfdjCcWYbkJEJgJB9Z7IB+7FfYp5rLo94F4BDFS347+VQI/rXt30bpZQjEkDcf1ERN4WkfWxd2uISEBEfu0dcTwpIk+JyBe9ZpfgPcEtIkcBM3G7AHEA1O1F9s9e3T959Y3pNuyQ2Zj0uACYBkwFBgJ/F5HVuO+SGQVMxu3xdiPwG6/NLOB33vRE4E1Vjbay/rdx30VjTLdhRyjGpMfJwO9UNaqqu3FfUvVpr/z3quqo6i7cVwHEDAX2JrNyL9HUi0ihz3EbkzJLKMakR2t9vrfVF3wNbl9LAO8AU733krQmG6hNITZj0sISijHpsRr4kogEvV6KZ+N2xvcicKF3LaUUOK1Jm43A0QCq+gGwFrgp9kY9ERkrIud50wOAvara0FUbZEx7LKEYkx7LcHsnXgf8Dfh37xTXY7i9WL+N22vzq7hvzQP3PRynNVnHVbgvjHpfRNYD99D4fovTgafSuwnGdIz1NmxMFxORAlWt8o4yXgNmqeouEcnFvaYyq42L8bF1/BH4nqq+1wUhG5MUu8vLmK73pIgU476z/GbvyAVVrRGRH+C+93tba429lyf9yZKJ6W7sCMUYY4wv7BqKMcYYX1hCMcYY4wtLKMYYY3xhCcUYY4wvLKEYY4zxhSUUY4wxvvj/Go0ztuLPr1sAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# plot CV误差曲线\n",
    "test_means = grid.cv_results_[ 'mean_test_score' ]\n",
    "test_stds = grid.cv_results_[ 'std_test_score' ]\n",
    "train_means = grid.cv_results_[ 'mean_train_score' ]\n",
    "train_stds = grid.cv_results_[ 'std_train_score' ]\n",
    "\n",
    "\n",
    "# plot results\n",
    "n_Cs = len(Cs)\n",
    "number_penaltys = len(penaltys)\n",
    "test_scores = np.array(test_means).reshape(n_Cs,number_penaltys)\n",
    "train_scores = np.array(train_means).reshape(n_Cs,number_penaltys)\n",
    "test_stds = np.array(test_stds).reshape(n_Cs,number_penaltys)\n",
    "train_stds = np.array(train_stds).reshape(n_Cs,number_penaltys)\n",
    "\n",
    "x_axis = np.log10(Cs)\n",
    "for i, value in enumerate(penaltys):\n",
    "    \n",
    "    pyplot.errorbar(x_axis, test_scores[:,i], yerr=test_stds[:,i] ,label = penaltys[i] +' Test')\n",
    "    pyplot.errorbar(x_axis, train_scores[:,i], yerr=train_stds[:,i] ,label = penaltys[i] +' Train')\n",
    "    \n",
    "pyplot.legend()\n",
    "pyplot.xlabel( 'log(C)' )                                                                                                      \n",
    "pyplot.ylabel( 'neg-logloss' )\n",
    "pyplot.savefig('LogisticGridSearchCV_C.png' )\n",
    "\n",
    "pyplot.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Classification report for classifier LogisticRegression(C=0.23357214690901223, class_weight=None, dual=False,\n",
      "          fit_intercept=True, intercept_scaling=1, max_iter=100,\n",
      "          multi_class='ovr', n_jobs=1, penalty='l2', random_state=None,\n",
      "          solver='liblinear', tol=0.0001, verbose=0, warm_start=False):\n",
      "             precision    recall  f1-score   support\n",
      "\n",
      "          0       0.84      0.92      0.88       107\n",
      "          1       0.76      0.62      0.68        47\n",
      "\n",
      "avg / total       0.82      0.82      0.82       154\n",
      "\n",
      "\n",
      "Confusion matrix:\n",
      "[[98  9]\n",
      " [18 29]]\n"
     ]
    }
   ],
   "source": [
    "#将GridSearchCV找到的最优超参数应用于训练集学习与测试集分类预测，并打印出测试集分类结果的混淆矩阵和分类报告\n",
    "Lr_penalty= LogisticRegression(penalty=grid.best_params_['penalty'],C=grid.best_params_['C'])\n",
    "\n",
    "Lr_penalty.fit(X_train, y_train)\n",
    "y_test_pred_lrp = Lr_penalty.predict(X_test)\n",
    "\n",
    "print(\"Classification report for classifier %s:\\n%s\\n\"\n",
    "      % (Lr_penalty, classification_report(y_test, y_test_pred_lrp)))\n",
    "print(\"Confusion matrix:\\n%s\" % confusion_matrix(y_test, y_test_pred_lrp))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "正则调优得到的最优超参数与Default Logistic Regression得到的在此测试集上的分类效果几乎相同"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "3、线性SVM正则参数调优"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 161,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8272126156758984\n",
      "{'penalty': 'l1', 'C': 0.1}\n"
     ]
    }
   ],
   "source": [
    "from sklearn.svm import LinearSVC\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "\n",
    "#需要调优的参数\n",
    "penaltys = ['l1','l2']\n",
    "Cs = np.logspace(-3,3, 7)\n",
    "tuned_parameters = dict(penalty = penaltys, C = Cs)\n",
    "\n",
    "#weight={0:0.5,1:0.5} \n",
    "grid2= GridSearchCV(LinearSVC(dual=False,class_weight=None), tuned_parameters,cv=5, scoring='roc_auc')\n",
    "#在这里我尝试了f1、accuracy、precision、roc_auc、neg_log_loss等多个评价准则，其中，按照roc_auc找到的最优超参数在测试集上的混淆矩阵最优\n",
    "\n",
    "grid2.fit(X_train,y_train)\n",
    "\n",
    "# examine the best model\n",
    "print(grid2.best_score_)\n",
    "print(grid2.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Classification report for classifier LinearSVC(C=0.1, class_weight=None, dual=False, fit_intercept=True,\n",
      "     intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n",
      "     multi_class='ovr', penalty='l1', random_state=None, tol=0.0001,\n",
      "     verbose=0):\n",
      "             precision    recall  f1-score   support\n",
      "\n",
      "          0       0.84      0.92      0.88       107\n",
      "          1       0.76      0.62      0.68        47\n",
      "\n",
      "avg / total       0.82      0.82      0.82       154\n",
      "\n",
      "\n",
      "Confusion matrix:\n",
      "[[98  9]\n",
      " [18 29]]\n"
     ]
    }
   ],
   "source": [
    "#在测试集上测试，估计模型性能\n",
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix\n",
    "\n",
    "#把网格搜索找到的最佳超参数应用与模型训练和分类中：\n",
    "SVC1 = LinearSVC(penalty=grid2.best_params_['penalty'],C=grid2.best_params_['C'],dual=False,class_weight=None).fit(X_train, y_train)\n",
    "y_test_pred_svc = SVC1.predict(X_test)\n",
    "\n",
    "print(\"Classification report for classifier %s:\\n%s\\n\"\n",
    "      % (SVC1, classification_report(y_test, y_test_pred_svc)))\n",
    "print(\"Confusion matrix:\\n%s\" % confusion_matrix(y_test, y_test_pred_svc))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "可以看到，SVM与Logistic Regression训练的模型在测试集上的分类性能基本差别不大"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "4、RBF核SVM正则参数调优"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8290172146375503\n",
      "{'C': 0.46415888336127786, 'gamma': 0.01}\n"
     ]
    }
   ],
   "source": [
    "from sklearn.svm import SVC\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "\n",
    "#需要调优的参数\n",
    "gammas = np.logspace(-3, 2, 6)\n",
    "Cs = np.logspace(-1, 1, 10)\n",
    "\n",
    "tuned_parameters = dict(C = Cs,gamma=gammas)\n",
    "\n",
    "grid3= GridSearchCV(SVC(kernel='rbf'), tuned_parameters,cv=5, scoring='roc_auc')\n",
    "#在这里我尝试了f1、accuracy、precision、roc_auc、neg_log_loss等多个评价准则，其中，按照roc_auc找到的最优超参数在测试集上的混淆矩阵最优\n",
    "\n",
    "grid3.fit(X_train,y_train)\n",
    "\n",
    "# examine the best model\n",
    "print(grid3.best_score_)\n",
    "print(grid3.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Classification report for classifier SVC(C=0.46415888336127786, cache_size=200, class_weight=None, coef0=0.0,\n",
      "  decision_function_shape='ovr', degree=3, gamma=0.01, kernel='rbf',\n",
      "  max_iter=-1, probability=False, random_state=None, shrinking=True,\n",
      "  tol=0.001, verbose=False):\n",
      "             precision    recall  f1-score   support\n",
      "\n",
      "          0       0.83      0.94      0.89       107\n",
      "          1       0.82      0.57      0.68        47\n",
      "\n",
      "avg / total       0.83      0.83      0.82       154\n",
      "\n",
      "\n",
      "Confusion matrix:\n",
      "[[101   6]\n",
      " [ 20  27]]\n"
     ]
    }
   ],
   "source": [
    "#在测试集上测试，估计模型性能\n",
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix\n",
    "\n",
    "#把网格搜索找到的最佳超参数应用与模型训练和分类中：\n",
    "SVC2 = SVC(gamma=grid3.best_params_['gamma'],C=grid3.best_params_['C']).fit(X_train, y_train)\n",
    "y_test_pred_svc = SVC2.predict(X_test)\n",
    "\n",
    "print(\"Classification report for classifier %s:\\n%s\\n\"\n",
    "      % (SVC2, classification_report(y_test, y_test_pred_svc)))\n",
    "print(\"Confusion matrix:\\n%s\" % confusion_matrix(y_test, y_test_pred_svc))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "可以看到RBF核SVM经过正则调优之后，在测试集上的分类性能可以略优于前两种线性分类器"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
